set more off

*********************************
* Product HHIs
*********************************

use "$inputs\compustat.dta", clear

destring gvkey, replace
destring sic, force replace

* Apply missing data filters 
drop if fyear==.
drop if at==.
drop if gvkey==.
drop if at<1
 
drop if gvkey == 4828 & fyear == 2001 // Severe data issue in shares outstanding

* Excluded industries (dropped below after mapping)
drop if sic >= 4900 & sic <= 4999			// regulated utilities
drop if sic >= 6000 & sic <= 6799			// financials and RE

drop if sale==.
drop if sale<=0
keep if fic=="USA"
keep if curcd=="USD"

replace sich = sic if sich==.
sort fyear sic
by fyear sic: egen total_sale = total(sale)
by fyear sic: egen total_emp = total(emp)
gen share = sale/total_sale
by fyear sic: egen hhi = sum(share^2)
drop if fyear==1979 | fyear==2018

collapse (mean) hhi [aw=total_sale], by(fyear)
save "$inputs\hhis_compustat.dta", replace

*********************************
* Labor HHIs
*********************************

use "$inputs\compustat.dta", clear

destring gvkey, replace
destring sic, force replace

* Apply missing data filters 
drop if fyear==.
drop if at==.
drop if gvkey==.
drop if at<1
drop if emp==.
 
drop if gvkey == 4828 & fyear == 2001 // Severe data issue in shares outstanding

* Excluded industries (dropped below after mapping)
drop if sic >= 4900 & sic <= 4999			// regulated utilities
drop if sic >= 6000 & sic <= 6799			// financials and RE

drop if sale==.
drop if sale<=0
keep if fic=="USA"
keep if curcd=="USD"

replace sich = sic if sich==.
sort fyear sic
by fyear sic: egen total_emp = total(emp)
by fyear sic: egen total_sale = total(sale)

gen share = emp/total_emp
by fyear sic: egen hhi_labor = sum(share^2)
drop if fyear==1979 | fyear==2018

collapse (mean) hhi_labor [aw=total_emp], by(fyear)
save "$inputs\hhis_compustat_emp.dta", replace

*********************************
* Capital HHIs
*********************************

use "$inputs\compustat.dta", clear

destring gvkey, replace
destring sic, force replace

* Apply missing data filters 
drop if fyear==.
drop if at==.
drop if gvkey==.
drop if at<1
drop if emp==.
 
drop if gvkey == 4828 & fyear == 2001 // Severe data issue in shares outstanding

* Excluded industries (dropped below after mapping)
drop if sic >= 4900 & sic <= 4999			// regulated utilities
drop if sic >= 6000 & sic <= 6799			// financials and RE

drop if sale==.
drop if sale<=0
keep if fic=="USA"
keep if curcd=="USD"

replace sich = sic if sich==.
sort fyear sic
by fyear sic: egen total_emp = total(emp)
by fyear: egen total_sale = total(sale)

gen share = sale/total_sale
by fyear: egen hhi_capital = sum(share^2)
drop if fyear==1979 | fyear==2018

collapse (mean) hhi_capital, by(fyear)
save "$inputs\hhis_compustat_capital.dta", replace
